Coronavirus is a family of viruses that are named after their spiky crown. The novel coronavirus, also known as SARS-CoV-2, is a contagious respiratory virus that first reported in Wuhan, China. On 2/11/2020, the World Health Organization designated the name COVID-19 for the disease caused by the novel coronavirus. This notebook aims at exploring COVID-19 through data analysis and projections.



Data is provided by Johns Hopkins University
Learn more from the WHO
Learn more from the CDC
Map Visualizations from JHU CCSE Dashboard
Source code is on my Github
Prepared By Santosh Yadav


Feel free to provide me with feedbacks.
Last update: 7/3/2020 3:13 PM ET
New Updates: data update for 7/2

*Prediction models are trained from a later starting date (3/13/2020). Therefore, earlier dates might not be accurate.

Source: https://www.statnews.com/wp-content/uploads/2020/02/Coronavirus-CDC-645x645.jpg
Keep strong, world! Stay safe and healthy.


Sections

In [1]:
import numpy as np 
import matplotlib.pyplot as plt 
import matplotlib.colors as mcolors
import pandas as pd 
import random
import math
import time
import xgboost
from sklearn.linear_model import LinearRegression, BayesianRidge
from sklearn.model_selection import RandomizedSearchCV, train_test_split
from sklearn.preprocessing import PolynomialFeatures
from sklearn.tree import DecisionTreeRegressor
from sklearn.svm import SVR
from sklearn.metrics import mean_squared_error, mean_absolute_error
import datetime
import operator 
plt.style.use('fivethirtyeight')
%matplotlib inline 

Import the data (make sure you update this on a daily basis)

In [2]:
confirmed_df = pd.read_csv('https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_time_series/time_series_covid19_confirmed_global.csv')
deaths_df = pd.read_csv('https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_time_series/time_series_covid19_deaths_global.csv')
recoveries_df = pd.read_csv('https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_time_series/time_series_covid19_recovered_global.csv')
latest_data = pd.read_csv('https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_daily_reports/07-02-2020.csv')
us_medical_data = pd.read_csv('https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_daily_reports_us/07-02-2020.csv')
In [3]:
latest_data.head()
Out[3]:
FIPS Admin2 Province_State Country_Region Last_Update Lat Long_ Confirmed Deaths Recovered Active Combined_Key Incidence_Rate Case-Fatality_Ratio
0 45001.0 Abbeville South Carolina US 2020-07-03 04:33:54 34.223334 -82.461707 119 0 0 119 Abbeville, South Carolina, US 485.179598 0.000000
1 22001.0 Acadia Louisiana US 2020-07-03 04:33:54 30.295065 -92.414197 945 37 0 908 Acadia, Louisiana, US 1523.088081 3.915344
2 51001.0 Accomack Virginia US 2020-07-03 04:33:54 37.767072 -75.632346 1042 14 0 1028 Accomack, Virginia, US 3224.408962 1.343570
3 16001.0 Ada Idaho US 2020-07-03 04:33:54 43.452658 -116.241552 2336 23 0 2313 Ada, Idaho, US 485.062927 0.984589
4 19001.0 Adair Iowa US 2020-07-03 04:33:54 41.330756 -94.471059 15 0 0 15 Adair, Iowa, US 209.731544 0.000000
In [4]:
confirmed_df.head()
Out[4]:
Province/State Country/Region Lat Long 1/22/20 1/23/20 1/24/20 1/25/20 1/26/20 1/27/20 ... 6/23/20 6/24/20 6/25/20 6/26/20 6/27/20 6/28/20 6/29/20 6/30/20 7/1/20 7/2/20
0 NaN Afghanistan 33.0000 65.0000 0 0 0 0 0 0 ... 29481 29640 30175 30451 30616 30967 31238 31517 31836 32022
1 NaN Albania 41.1533 20.1683 0 0 0 0 0 0 ... 2047 2114 2192 2269 2330 2402 2466 2535 2580 2662
2 NaN Algeria 28.0339 1.6596 0 0 0 0 0 0 ... 12076 12248 12445 12685 12968 13273 13571 13907 14272 14657
3 NaN Andorra 42.5063 1.5218 0 0 0 0 0 0 ... 855 855 855 855 855 855 855 855 855 855
4 NaN Angola -11.2027 17.8739 0 0 0 0 0 0 ... 189 197 212 212 259 267 276 284 291 315

5 rows × 167 columns

In [5]:
us_medical_data.head()
Out[5]:
Province_State Country_Region Last_Update Lat Long_ Confirmed Deaths Recovered Active FIPS Incident_Rate People_Tested People_Hospitalized Mortality_Rate UID ISO3 Testing_Rate Hospitalization_Rate
0 Alabama US 2020-07-03 04:34:05 32.3182 -86.9023 40111 985 22082.0 17044.0 1 818.060098 419728.0 2835.0 2.455685 84000001 USA 8560.313347 7.067887
1 Alaska US 2020-07-03 04:34:05 61.3707 -152.4044 1014 14 535.0 465.0 2 138.610748 115909.0 NaN 1.380671 84000002 USA 15844.411485 NaN
2 American Samoa US 2020-07-03 04:34:05 -14.2710 -170.1320 0 0 NaN 0.0 60 0.000000 696.0 NaN NaN 16 ASM 1250.876152 NaN
3 Arizona US 2020-07-03 04:34:05 33.7298 -111.4312 87445 1764 10137.0 75544.0 4 1201.379309 560839.0 4916.0 2.017268 84000004 USA 7705.190352 5.621819
4 Arkansas US 2020-07-03 04:34:05 34.9697 -92.3731 22075 279 15698.0 6098.0 5 731.492171 323987.0 1477.0 1.263873 84000005 USA 10735.852958 6.690827
In [6]:
cols = confirmed_df.keys()

Get all the dates for the outbreak

In [7]:
confirmed = confirmed_df.loc[:, cols[4]:cols[-1]]
deaths = deaths_df.loc[:, cols[4]:cols[-1]]
recoveries = recoveries_df.loc[:, cols[4]:cols[-1]]
In [8]:
dates = confirmed.keys()
world_cases = []
total_deaths = [] 
mortality_rate = []
recovery_rate = [] 
total_recovered = [] 
total_active = [] 

china_cases = [] 
italy_cases = []
us_cases = [] 
spain_cases = [] 
france_cases = [] 
germany_cases = [] 
uk_cases = [] 
russia_cases = [] 
brazil_cases = []
india_cases = []
peru_cases = [] 

china_deaths = [] 
italy_deaths = []
us_deaths = [] 
spain_deaths = [] 
france_deaths = [] 
germany_deaths = [] 
uk_deaths = [] 
russia_deaths = []
brazil_deaths = [] 
india_deaths = []
peru_deaths = []

china_recoveries = [] 
italy_recoveries = []
us_recoveries = [] 
spain_recoveries = [] 
france_recoveries = [] 
germany_recoveries = [] 
uk_recoveries = [] 
russia_recoveries = [] 
brazil_recoveries = [] 
india_recoveries = [] 
peru_recoveries = [] 

for i in dates:
    confirmed_sum = confirmed[i].sum()
    death_sum = deaths[i].sum()
    recovered_sum = recoveries[i].sum()
    
    # confirmed, deaths, recovered, and active
    world_cases.append(confirmed_sum)
    total_deaths.append(death_sum)
    total_recovered.append(recovered_sum)
    total_active.append(confirmed_sum-death_sum-recovered_sum)
    
    # calculate rates
    mortality_rate.append(death_sum/confirmed_sum)
    recovery_rate.append(recovered_sum/confirmed_sum)

    # case studies 
    china_cases.append(confirmed_df[confirmed_df['Country/Region']=='China'][i].sum())
    italy_cases.append(confirmed_df[confirmed_df['Country/Region']=='Italy'][i].sum())
    us_cases.append(confirmed_df[confirmed_df['Country/Region']=='US'][i].sum())
    spain_cases.append(confirmed_df[confirmed_df['Country/Region']=='Spain'][i].sum())
    france_cases.append(confirmed_df[confirmed_df['Country/Region']=='France'][i].sum())
    germany_cases.append(confirmed_df[confirmed_df['Country/Region']=='Germany'][i].sum())
    uk_cases.append(confirmed_df[confirmed_df['Country/Region']=='United Kingdom'][i].sum())
    russia_cases.append(confirmed_df[confirmed_df['Country/Region']=='Russia'][i].sum())
    brazil_cases.append(confirmed_df[confirmed_df['Country/Region']=='Brazil'][i].sum())
    india_cases.append(confirmed_df[confirmed_df['Country/Region']=='India'][i].sum())
    peru_cases.append(confirmed_df[confirmed_df['Country/Region']=='Peru'][i].sum())
    
    
    china_deaths.append(deaths_df[deaths_df['Country/Region']=='China'][i].sum())
    italy_deaths.append(deaths_df[deaths_df['Country/Region']=='Italy'][i].sum())
    us_deaths.append(deaths_df[deaths_df['Country/Region']=='US'][i].sum())
    spain_deaths.append(deaths_df[deaths_df['Country/Region']=='Spain'][i].sum())
    france_deaths.append(deaths_df[deaths_df['Country/Region']=='France'][i].sum())
    germany_deaths.append(deaths_df[deaths_df['Country/Region']=='Germany'][i].sum())
    uk_deaths.append(deaths_df[deaths_df['Country/Region']=='United Kingdom'][i].sum())
    russia_deaths.append(deaths_df[deaths_df['Country/Region']=='Russia'][i].sum())
    brazil_deaths.append(deaths_df[deaths_df['Country/Region']=='Brazil'][i].sum())
    india_deaths.append(deaths_df[deaths_df['Country/Region']=='India'][i].sum())
    peru_deaths.append(deaths_df[deaths_df['Country/Region']=='Peru'][i].sum())
    
    china_recoveries.append(recoveries_df[recoveries_df['Country/Region']=='China'][i].sum())
    italy_recoveries.append(recoveries_df[recoveries_df['Country/Region']=='Italy'][i].sum())
    us_recoveries.append(recoveries_df[recoveries_df['Country/Region']=='US'][i].sum())
    spain_recoveries.append(recoveries_df[recoveries_df['Country/Region']=='Spain'][i].sum())
    france_recoveries.append(recoveries_df[recoveries_df['Country/Region']=='France'][i].sum())
    germany_recoveries.append(recoveries_df[recoveries_df['Country/Region']=='Germany'][i].sum())
    uk_recoveries.append(recoveries_df[recoveries_df['Country/Region']=='United Kingdom'][i].sum())
    russia_recoveries.append(recoveries_df[recoveries_df['Country/Region']=='Russia'][i].sum())
    brazil_recoveries.append(recoveries_df[recoveries_df['Country/Region']=='Brazil'][i].sum())
    india_recoveries.append(recoveries_df[recoveries_df['Country/Region']=='India'][i].sum())
    peru_recoveries.append(recoveries_df[recoveries_df['Country/Region']=='Peru'][i].sum())

Getting daily increases

In [9]:
def daily_increase(data):
    d = [] 
    for i in range(len(data)):
        if i == 0:
            d.append(data[0])
        else:
            d.append(data[i]-data[i-1])
    return d 

# confirmed cases
world_daily_increase = daily_increase(world_cases)
china_daily_increase = daily_increase(china_cases)
italy_daily_increase = daily_increase(italy_cases)
us_daily_increase = daily_increase(us_cases)
spain_daily_increase = daily_increase(spain_cases)
france_daily_increase = daily_increase(france_cases)
germany_daily_increase = daily_increase(germany_cases)
uk_daily_increase = daily_increase(uk_cases)
russia_daily_increase = daily_increase(russia_cases)
brazil_daily_increase = daily_increase(brazil_cases)
india_daily_increase = daily_increase(india_cases)
peru_daily_increase = daily_increase(peru_cases)

# deaths
world_daily_death = daily_increase(total_deaths)
china_daily_death = daily_increase(china_deaths)
italy_daily_death = daily_increase(italy_deaths)
us_daily_death = daily_increase(us_deaths)
spain_daily_death = daily_increase(spain_deaths)
france_daily_death = daily_increase(france_deaths)
germany_daily_death = daily_increase(germany_deaths)
uk_daily_death = daily_increase(uk_deaths)
russia_daily_death = daily_increase(russia_deaths)
brazil_daily_death = daily_increase(brazil_deaths)
india_daily_death = daily_increase(india_deaths)
peru_daily_death = daily_increase(peru_deaths)


# recoveries
world_daily_recovery = daily_increase(total_recovered)
china_daily_recovery = daily_increase(china_recoveries)
italy_daily_recovery = daily_increase(italy_recoveries)
us_daily_recovery = daily_increase(us_recoveries)
spain_daily_recovery = daily_increase(spain_recoveries)
france_daily_recovery = daily_increase(france_recoveries)
germany_daily_recovery = daily_increase(germany_recoveries)
uk_daily_recovery = daily_increase(uk_recoveries)
russia_daily_recovery = daily_increase(russia_recoveries)
brazil_daily_recovery = daily_increase(brazil_recoveries)
india_daily_recovery = daily_increase(india_recoveries)
peru_daily_recovery = daily_increase(peru_recoveries)
In [10]:
days_since_1_22 = np.array([i for i in range(len(dates))]).reshape(-1, 1)
world_cases = np.array(world_cases).reshape(-1, 1)
total_deaths = np.array(total_deaths).reshape(-1, 1)
total_recovered = np.array(total_recovered).reshape(-1, 1)

Future forcasting

In [11]:
days_in_future = 10
future_forcast = np.array([i for i in range(len(dates)+days_in_future)]).reshape(-1, 1)
adjusted_dates = future_forcast[:-10]

Convert integer into datetime for better visualization

In [12]:
start = '1/22/2020'
start_date = datetime.datetime.strptime(start, '%m/%d/%Y')
future_forcast_dates = []
for i in range(len(future_forcast)):
    future_forcast_dates.append((start_date + datetime.timedelta(days=i)).strftime('%m/%d/%Y'))
In [13]:
# slightly modify the data to fit the model better (regression models cannot pick the pattern)
X_train_confirmed, X_test_confirmed, y_train_confirmed, y_test_confirmed = train_test_split(days_since_1_22[50:], world_cases[50:], test_size=0.16, shuffle=False) 

Model for predicting # of confirmed cases. I am using support vector machine, bayesian ridge , and linear regression in this example.

In [14]:
# # use this to find the optimal parameters for SVR
# c = [0.01, 0.1, 1]
# gamma = [0.01, 0.1, 1]
# epsilon = [0.01, 0.1, 1]
# shrinking = [True, False]
# degree = [3, 4, 5]

# svm_grid = {'C': c, 'gamma' : gamma, 'epsilon': epsilon, 'shrinking' : shrinking, 'degree': degree}

# svm = SVR(kernel='poly')
# svm_search = RandomizedSearchCV(svm, svm_grid, scoring='neg_mean_squared_error', cv=3, return_train_score=True, n_jobs=-1, n_iter=30, verbose=1)
# svm_search.fit(X_train_confirmed, y_train_confirmed)
In [15]:
# svm_search.best_params_
In [16]:
# svm_confirmed = svm_search.best_estimator_
svm_confirmed = SVR(shrinking=True, kernel='poly',gamma=0.01, epsilon=1,degree=3, C=0.1)
svm_confirmed.fit(X_train_confirmed, y_train_confirmed)
svm_pred = svm_confirmed.predict(future_forcast)
D:\ProgramData\Anaconda3\lib\site-packages\sklearn\utils\validation.py:73: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().
  return f(**kwargs)
In [17]:
# check against testing data
svm_test_pred = svm_confirmed.predict(X_test_confirmed)
plt.plot(y_test_confirmed)
plt.plot(svm_test_pred)
plt.legend(['Test Data', 'SVM Predictions'])
print('MAE:', mean_absolute_error(svm_test_pred, y_test_confirmed))
print('MSE:',mean_squared_error(svm_test_pred, y_test_confirmed))
MAE: 274062.8881788842
MSE: 77939261329.16754
In [18]:
# transform our data for polynomial regression
poly = PolynomialFeatures(degree=5)
poly_X_train_confirmed = poly.fit_transform(X_train_confirmed)
poly_X_test_confirmed = poly.fit_transform(X_test_confirmed)
poly_future_forcast = poly.fit_transform(future_forcast)

bayesian_poly = PolynomialFeatures(degree=4)
bayesian_poly_X_train_confirmed = bayesian_poly.fit_transform(X_train_confirmed)
bayesian_poly_X_test_confirmed = bayesian_poly.fit_transform(X_test_confirmed)
bayesian_poly_future_forcast = bayesian_poly.fit_transform(future_forcast)
In [19]:
# polynomial regression
linear_model = LinearRegression(normalize=True, fit_intercept=False)
linear_model.fit(poly_X_train_confirmed, y_train_confirmed)
test_linear_pred = linear_model.predict(poly_X_test_confirmed)
linear_pred = linear_model.predict(poly_future_forcast)
print('MAE:', mean_absolute_error(test_linear_pred, y_test_confirmed))
print('MSE:',mean_squared_error(test_linear_pred, y_test_confirmed))
MAE: 272679.22506164876
MSE: 130534604821.71646
In [20]:
print(linear_model.coef_)
[[ 3.10994345e+07 -1.74238370e+06  3.64683492e+04 -3.57157411e+02
   1.70473288e+00 -3.15032281e-03]]
In [21]:
plt.plot(y_test_confirmed)
plt.plot(test_linear_pred)
plt.legend(['Test Data', 'Polynomial Regression Predictions'])
Out[21]:
<matplotlib.legend.Legend at 0x1fcb9985908>
In [22]:
# bayesian ridge polynomial regression
tol = [1e-6, 1e-5, 1e-4, 1e-3, 1e-2]
alpha_1 = [1e-7, 1e-6, 1e-5, 1e-4, 1e-3]
alpha_2 = [1e-7, 1e-6, 1e-5, 1e-4, 1e-3]
lambda_1 = [1e-7, 1e-6, 1e-5, 1e-4, 1e-3]
lambda_2 = [1e-7, 1e-6, 1e-5, 1e-4, 1e-3]
normalize = [True, False]

bayesian_grid = {'tol': tol, 'alpha_1': alpha_1, 'alpha_2' : alpha_2, 'lambda_1': lambda_1, 'lambda_2' : lambda_2, 
                 'normalize' : normalize}

bayesian = BayesianRidge(fit_intercept=False)
bayesian_search = RandomizedSearchCV(bayesian, bayesian_grid, scoring='neg_mean_squared_error', cv=3, return_train_score=True, n_jobs=-1, n_iter=40, verbose=1)
bayesian_search.fit(bayesian_poly_X_train_confirmed, y_train_confirmed)
Fitting 3 folds for each of 40 candidates, totalling 120 fits
[Parallel(n_jobs=-1)]: Using backend LokyBackend with 4 concurrent workers.
[Parallel(n_jobs=-1)]: Done  68 tasks      | elapsed:    1.8s
[Parallel(n_jobs=-1)]: Done 120 out of 120 | elapsed:    1.8s finished
D:\ProgramData\Anaconda3\lib\site-packages\sklearn\utils\validation.py:73: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().
  return f(**kwargs)
Out[22]:
RandomizedSearchCV(cv=3, estimator=BayesianRidge(fit_intercept=False),
                   n_iter=40, n_jobs=-1,
                   param_distributions={'alpha_1': [1e-07, 1e-06, 1e-05, 0.0001,
                                                    0.001],
                                        'alpha_2': [1e-07, 1e-06, 1e-05, 0.0001,
                                                    0.001],
                                        'lambda_1': [1e-07, 1e-06, 1e-05,
                                                     0.0001, 0.001],
                                        'lambda_2': [1e-07, 1e-06, 1e-05,
                                                     0.0001, 0.001],
                                        'normalize': [True, False],
                                        'tol': [1e-06, 1e-05, 0.0001, 0.001,
                                                0.01]},
                   return_train_score=True, scoring='neg_mean_squared_error',
                   verbose=1)
In [23]:
bayesian_search.best_params_
Out[23]:
{'tol': 1e-06,
 'normalize': False,
 'lambda_2': 0.001,
 'lambda_1': 1e-06,
 'alpha_2': 1e-05,
 'alpha_1': 1e-07}
In [24]:
bayesian_confirmed = bayesian_search.best_estimator_
test_bayesian_pred = bayesian_confirmed.predict(bayesian_poly_X_test_confirmed)
bayesian_pred = bayesian_confirmed.predict(bayesian_poly_future_forcast)
print('MAE:', mean_absolute_error(test_bayesian_pred, y_test_confirmed))
print('MSE:',mean_squared_error(test_bayesian_pred, y_test_confirmed))
MAE: 167374.21756371605
MSE: 48146725658.72198
In [25]:
plt.plot(y_test_confirmed)
plt.plot(test_bayesian_pred)
plt.legend(['Test Data', 'Bayesian Ridge Polynomial Predictions'])
Out[25]:
<matplotlib.legend.Legend at 0x1fcb9a55fc8>

Graphing the number of confirmed cases, active cases, deaths, recoveries, mortality rate (CFR), and recovery rate

In [26]:
adjusted_dates = adjusted_dates.reshape(1, -1)[0]
plt.figure(figsize=(16, 9))
plt.plot(adjusted_dates, world_cases)
plt.title('# of Coronavirus Cases Over Time', size=30)
plt.xlabel('Days Since 1/22/2020', size=30)
plt.ylabel('# of Cases', size=30)
plt.xticks(size=20)
plt.yticks(size=20)
plt.show()

plt.figure(figsize=(16, 9))
plt.plot(adjusted_dates, total_deaths)
plt.title('# of Coronavirus Deaths Over Time', size=30)
plt.xlabel('Days Since 1/22/2020', size=30)
plt.ylabel('# of Cases', size=30)
plt.xticks(size=20)
plt.yticks(size=20)
plt.show()

plt.figure(figsize=(16, 9))
plt.plot(adjusted_dates, total_recovered)
plt.title('# of Coronavirus Recoveries Over Time', size=30)
plt.xlabel('Days Since 1/22/2020', size=30)
plt.ylabel('# of Cases', size=30)
plt.xticks(size=20)
plt.yticks(size=20)
plt.show()

plt.figure(figsize=(16, 9))
plt.plot(adjusted_dates, total_active)
plt.title('# of Coronavirus Active Cases Over Time', size=30)
plt.xlabel('Days Since 1/22/2020', size=30)
plt.ylabel('# of Active Cases', size=30)
plt.xticks(size=20)
plt.yticks(size=20)
plt.show()
In [27]:
plt.figure(figsize=(16, 9))
plt.bar(adjusted_dates, world_daily_increase)
plt.title('World Daily Increases in Confirmed Cases', size=30)
plt.xlabel('Days Since 1/22/2020', size=30)
plt.ylabel('# of Cases', size=30)
plt.xticks(size=20)
plt.yticks(size=20)
plt.show()

plt.figure(figsize=(16, 9))
plt.bar(adjusted_dates, world_daily_death)
plt.title('World Daily Increases in Confirmed Deaths', size=30)
plt.xlabel('Days Since 1/22/2020', size=30)
plt.ylabel('# of Cases', size=30)
plt.xticks(size=20)
plt.yticks(size=20)
plt.show()

plt.figure(figsize=(16, 9))
plt.bar(adjusted_dates, world_daily_recovery)
plt.title('World Daily Increases in Confirmed Recoveries', size=30)
plt.xlabel('Days Since 1/22/2020', size=30)
plt.ylabel('# of Cases', size=30)
plt.xticks(size=20)
plt.yticks(size=20)
plt.show()
In [28]:
plt.figure(figsize=(16, 9))
plt.plot(adjusted_dates, np.log10(world_cases))
plt.title('Log of # of Coronavirus Cases Over Time', size=30)
plt.xlabel('Days Since 1/22/2020', size=30)
plt.ylabel('# of Cases', size=30)
plt.xticks(size=20)
plt.yticks(size=20)
plt.show()

plt.figure(figsize=(16, 9))
plt.plot(adjusted_dates, np.log10(total_deaths))
plt.title('Log of # of Coronavirus Deaths Over Time', size=30)
plt.xlabel('Days Since 1/22/2020', size=30)
plt.ylabel('# of Cases', size=30)
plt.xticks(size=20)
plt.yticks(size=20)
plt.show()

plt.figure(figsize=(16, 9))
plt.plot(adjusted_dates, np.log10(total_recovered))
plt.title('Log of # of Coronavirus Recoveries Over Time', size=30)
plt.xlabel('Days Since 1/22/2020', size=30)
plt.ylabel('# of Cases', size=30)
plt.xticks(size=20)
plt.yticks(size=20)
plt.show()
In [29]:
def country_plot(x, y1, y2, y3, y4, country):
    plt.figure(figsize=(16, 9))
    plt.plot(x, y1)
    plt.title('{} Confirmed Cases'.format(country), size=30)
    plt.xlabel('Days Since 1/22/2020', size=30)
    plt.ylabel('# of Cases', size=30)
    plt.xticks(size=20)
    plt.yticks(size=20)
    plt.show()

    plt.figure(figsize=(16, 9))
    plt.bar(x, y2)
    plt.title('{} Daily Increases in Confirmed Cases'.format(country), size=30)
    plt.xlabel('Days Since 1/22/2020', size=30)
    plt.ylabel('# of Cases', size=30)
    plt.xticks(size=20)
    plt.yticks(size=20)
    plt.show()

    plt.figure(figsize=(16, 9))
    plt.bar(x, y3)
    plt.title('{} Daily Increases in Deaths'.format(country), size=30)
    plt.xlabel('Days Since 1/22/2020', size=30)
    plt.ylabel('# of Cases', size=30)
    plt.xticks(size=20)
    plt.yticks(size=20)
    plt.show()

    plt.figure(figsize=(16, 9))
    plt.bar(x, y4)
    plt.title('{} Daily Increases in Recoveries'.format(country), size=30)
    plt.xlabel('Days Since 1/22/2020', size=30)
    plt.ylabel('# of Cases', size=30)
    plt.xticks(size=20)
    plt.yticks(size=20)
    plt.show()

Country Specific Graphs

In [30]:
country_plot(adjusted_dates, china_cases, china_daily_increase, china_daily_death, china_daily_recovery, 'China')
In [31]:
country_plot(adjusted_dates, italy_cases, italy_daily_increase, italy_daily_death, italy_daily_recovery, 'Italy')
In [32]:
country_plot(adjusted_dates, us_cases, us_daily_increase, us_daily_death, us_daily_recovery, 'United States')
In [33]:
country_plot(adjusted_dates, spain_cases, spain_daily_increase, spain_daily_death, spain_daily_recovery, 'Spain')
In [34]:
country_plot(adjusted_dates, france_cases, france_daily_increase, france_daily_death, france_daily_recovery, 'France')
In [35]:
country_plot(adjusted_dates, germany_cases, germany_daily_increase, germany_daily_death, germany_daily_recovery, 'Germany')
In [36]:
country_plot(adjusted_dates, uk_cases, uk_daily_increase, uk_daily_death, uk_daily_recovery, 'UK')
In [37]:
country_plot(adjusted_dates, russia_cases, russia_daily_increase, russia_daily_death, russia_daily_recovery, 'Russia')
In [38]:
country_plot(adjusted_dates, brazil_cases, brazil_daily_increase, brazil_daily_death, brazil_daily_recovery, 'Brazil')
In [39]:
country_plot(adjusted_dates, india_cases, india_daily_increase, india_daily_death, india_daily_recovery, 'India')
In [40]:
country_plot(adjusted_dates, peru_cases, peru_daily_increase, peru_daily_death, peru_daily_recovery, 'Peru')
In [41]:
plt.figure(figsize=(16, 9))
plt.plot(adjusted_dates, china_cases)
plt.plot(adjusted_dates, italy_cases)
plt.plot(adjusted_dates, us_cases)
plt.plot(adjusted_dates, russia_cases)
plt.plot(adjusted_dates, brazil_cases)
plt.plot(adjusted_dates, india_cases)

plt.title('# of Coronavirus Cases', size=30)
plt.xlabel('Days Since 1/22/2020', size=30)
plt.ylabel('# of Cases', size=30)
plt.legend(['China', 'Italy', 'US', 'Russia', 'Brazil', 'India'], prop={'size': 20})
plt.xticks(size=20)
plt.yticks(size=20)
plt.show()

plt.figure(figsize=(16, 9))
plt.plot(adjusted_dates, china_deaths)
plt.plot(adjusted_dates, italy_deaths)
plt.plot(adjusted_dates, us_deaths)
plt.plot(adjusted_dates, russia_deaths)
plt.plot(adjusted_dates, brazil_deaths)
plt.plot(adjusted_dates, india_deaths)

plt.title('# of Coronavirus Deaths', size=30)
plt.xlabel('Days Since 1/22/2020', size=30)
plt.ylabel('# of Cases', size=30)
plt.legend(['China', 'Italy', 'US', 'Russia', 'Brazil', 'India'], prop={'size': 20})
plt.xticks(size=20)
plt.yticks(size=20)
plt.show()

plt.figure(figsize=(16, 9))
plt.plot(adjusted_dates, china_recoveries)
plt.plot(adjusted_dates, italy_recoveries)
plt.plot(adjusted_dates, us_recoveries)
plt.plot(adjusted_dates, russia_recoveries)
plt.plot(adjusted_dates, brazil_recoveries)
plt.plot(adjusted_dates, india_recoveries)

plt.title('# of Coronavirus Recoveries', size=30)
plt.xlabel('Days Since 1/22/2020', size=30)
plt.ylabel('# of Cases', size=30)
plt.legend(['China', 'Italy', 'US', 'Russia', 'Brazil', 'India'], prop={'size': 20})
plt.xticks(size=20)
plt.yticks(size=20)
plt.show()
In [42]:
def plot_predictions(x, y, pred, algo_name, color):
    plt.figure(figsize=(16, 9))
    plt.plot(x, y)
    plt.plot(future_forcast, pred, linestyle='dashed', color=color)
    plt.title('# of Coronavirus Cases Over Time', size=30)
    plt.xlabel('Days Since 1/22/2020', size=30)
    plt.ylabel('# of Cases', size=30)
    plt.legend(['Confirmed Cases', algo_name], prop={'size': 20})
    plt.xticks(size=20)
    plt.yticks(size=20)
    plt.show()

Predictions for confirmed coronavirus cases worldwide

In [43]:
plot_predictions(adjusted_dates, world_cases, svm_pred, 'SVM Predictions', 'purple')
In [44]:
plot_predictions(adjusted_dates, world_cases, linear_pred, 'Polynomial Regression Predictions', 'orange')
In [45]:
plot_predictions(adjusted_dates, world_cases, bayesian_pred, 'Bayesian Ridge Regression Predictions', 'green')
In [46]:
# Future predictions using SVM 
svm_df = pd.DataFrame({'Date': future_forcast_dates[-10:], 'SVM Predicted # of Confirmed Cases Worldwide': np.round(svm_pred[-10:])})
svm_df
Out[46]:
Date SVM Predicted # of Confirmed Cases Worldwide
0 07/03/2020 11389062.0
1 07/04/2020 11587423.0
2 07/05/2020 11788217.0
3 07/06/2020 11991460.0
4 07/07/2020 12197166.0
5 07/08/2020 12405351.0
6 07/09/2020 12616028.0
7 07/10/2020 12829214.0
8 07/11/2020 13044923.0
9 07/12/2020 13263170.0
In [47]:
# Future predictions using polynomial regression
linear_pred = linear_pred.reshape(1,-1)[0]
svm_df = pd.DataFrame({'Date': future_forcast_dates[-10:], 'Polynomial Predicted # of Confirmed Cases Worldwide': np.round(linear_pred[-10:])})
svm_df
Out[47]:
Date Polynomial Predicted # of Confirmed Cases Worldwide
0 07/03/2020 10163526.0
1 07/04/2020 10250757.0
2 07/05/2020 10331768.0
3 07/06/2020 10406031.0
4 07/07/2020 10472997.0
5 07/08/2020 10532096.0
6 07/09/2020 10582734.0
7 07/10/2020 10624296.0
8 07/11/2020 10656142.0
9 07/12/2020 10677611.0
In [48]:
# Future predictions using Bayesian Ridge 
svm_df = pd.DataFrame({'Date': future_forcast_dates[-10:], 'Bayesian Ridge Predicted # of Confirmed Cases Worldwide': np.round(bayesian_pred[-10:])})
svm_df
Out[48]:
Date Bayesian Ridge Predicted # of Confirmed Cases Worldwide
0 07/03/2020 10530666.0
1 07/04/2020 10673344.0
2 07/05/2020 10816486.0
3 07/06/2020 10960070.0
4 07/07/2020 11104076.0
5 07/08/2020 11248481.0
6 07/09/2020 11393266.0
7 07/10/2020 11538407.0
8 07/11/2020 11683882.0
9 07/12/2020 11829669.0

Mortality Rate (worldwide) susceptible to change

In [49]:
mean_mortality_rate = np.mean(mortality_rate)
plt.figure(figsize=(16, 9))
plt.plot(adjusted_dates, mortality_rate, color='orange')
plt.axhline(y = mean_mortality_rate,linestyle='--', color='black')
plt.title('Mortality Rate of Coronavirus Over Time', size=30)
plt.legend(['mortality rate', 'y='+str(mean_mortality_rate)], prop={'size': 20})
plt.xlabel('Days Since 1/22/2020', size=30)
plt.ylabel('Mortality Rate', size=30)
plt.xticks(size=20)
plt.yticks(size=20)
plt.show()

Recovery Rate (worldwide) suceptible to change

In [50]:
mean_recovery_rate = np.mean(recovery_rate)
plt.figure(figsize=(16, 9))
plt.plot(adjusted_dates, recovery_rate, color='blue')
plt.axhline(y = mean_recovery_rate,linestyle='--', color='black')
plt.title('Recovery Rate of Coronavirus Over Time', size=30)
plt.legend(['recovery rate', 'y='+str(mean_recovery_rate)], prop={'size': 20})
plt.xlabel('Days Since 1/22/2020', size=30)
plt.ylabel('Recovery Rate', size=30)
plt.xticks(size=20)
plt.yticks(size=20)
plt.show()

Graphing deaths against recoveries

In [51]:
plt.figure(figsize=(16, 9))
plt.plot(adjusted_dates, total_deaths, color='r')
plt.plot(adjusted_dates, total_recovered, color='green')
plt.legend(['death', 'recoveries'], loc='best', fontsize=20)
plt.title('# of Coronavirus Cases', size=30)
plt.xlabel('Days Since 1/22/2020', size=30)
plt.ylabel('# of Cases', size=30)
plt.xticks(size=20)
plt.yticks(size=20)
plt.show()

Plotting the number of deaths against the number of recoveries

In [52]:
plt.figure(figsize=(16, 9))
plt.plot(total_recovered, total_deaths)
plt.title('# of Coronavirus Deaths vs. # of Coronavirus Recoveries', size=30)
plt.xlabel('# of Coronavirus Recoveries', size=30)
plt.ylabel('# of Coronavirus Deaths', size=30)
plt.xticks(size=20)
plt.yticks(size=20)
plt.show()

Getting information about countries/regions that have confirmed coronavirus cases

In [53]:
unique_countries =  list(latest_data['Country_Region'].unique())
In [54]:
country_confirmed_cases = []
country_death_cases = [] 
country_active_cases = []
country_recovery_cases = []
country_mortality_rate = [] 

no_cases = []
for i in unique_countries:
    cases = latest_data[latest_data['Country_Region']==i]['Confirmed'].sum()
    if cases > 0:
        country_confirmed_cases.append(cases)
    else:
        no_cases.append(i)
        
for i in no_cases:
    unique_countries.remove(i)
    
# sort countries by the number of confirmed cases
unique_countries = [k for k, v in sorted(zip(unique_countries, country_confirmed_cases), key=operator.itemgetter(1), reverse=True)]
for i in range(len(unique_countries)):
    country_confirmed_cases[i] = latest_data[latest_data['Country_Region']==unique_countries[i]]['Confirmed'].sum()
    country_death_cases.append(latest_data[latest_data['Country_Region']==unique_countries[i]]['Deaths'].sum())
    country_recovery_cases.append(latest_data[latest_data['Country_Region']==unique_countries[i]]['Recovered'].sum())
    country_active_cases.append(country_confirmed_cases[i] - country_death_cases[i] - country_recovery_cases[i])
    country_mortality_rate.append(country_death_cases[i]/country_confirmed_cases[i])

Data table

In [55]:
country_df = pd.DataFrame({'Country Name': unique_countries, 'Number of Confirmed Cases': country_confirmed_cases,
                          'Number of Deaths': country_death_cases, 'Number of Recoveries' : country_recovery_cases, 
                          'Number of Active Cases' : country_active_cases,
                          'Mortality Rate': country_mortality_rate})
# number of cases per country/region

country_df.style.background_gradient(cmap='Greens')
Out[55]:
Country Name Number of Confirmed Cases Number of Deaths Number of Recoveries Number of Active Cases Mortality Rate
0 US 2739879 128740 781970 1829169 0.046987
1 Brazil 1496858 61884 957692 477282 0.041343
2 Russia 660231 9668 428276 222287 0.014643
3 India 625544 18213 379892 227439 0.029115
4 Peru 292004 10045 182097 99862 0.034400
5 United Kingdom 285268 44080 1373 239815 0.154521
6 Chile 284541 5920 249247 29374 0.020805
7 Spain 250103 28368 150376 71359 0.113425
8 Italy 240961 34818 191083 15060 0.144496
9 Mexico 238511 29189 183757 25565 0.122380
10 Iran 232863 11106 194098 27659 0.047693
11 Pakistan 221896 4551 113623 103722 0.020510
12 France 203640 29878 76927 96835 0.146720
13 Turkey 202284 5167 176965 20152 0.025543
14 Saudi Arabia 197608 1752 137669 58187 0.008866
15 Germany 196370 9006 179800 7564 0.045862
16 South Africa 168061 2844 81999 83218 0.016922
17 Bangladesh 153277 1926 66442 84909 0.012565
18 Canada 106643 8700 69872 28071 0.081581
19 Colombia 102261 3650 43481 55130 0.035693
20 Qatar 97897 118 86597 11182 0.001205
21 China 84830 4641 79665 524 0.054709
22 Egypt 71299 3120 19288 48891 0.043759
23 Sweden 70639 5411 0 65228 0.076601
24 Argentina 69941 1385 24186 44370 0.019802
25 Belarus 62698 405 48738 13555 0.006460
26 Belgium 61598 9761 17044 34793 0.158463
27 Ecuador 59468 4639 28032 26797 0.078008
28 Indonesia 59394 2987 26667 29740 0.050291
29 Iraq 53708 2160 27912 23636 0.040217
30 Netherlands 50546 6132 186 44228 0.121315
31 United Arab Emirates 49469 317 38664 10488 0.006408
32 Kuwait 47859 359 38390 9110 0.007501
33 Ukraine 46821 1200 20755 24866 0.025630
34 Singapore 44310 26 39429 4855 0.000587
35 Portugal 42782 1587 28097 13098 0.037095
36 Kazakhstan 42574 188 14777 27609 0.004416
37 Oman 42555 188 25318 17049 0.004418
38 Philippines 38805 1274 10673 26858 0.032831
39 Bolivia 35528 1271 10358 23899 0.035775
40 Panama 35237 667 16445 18125 0.018929
41 Poland 35146 1492 22209 11445 0.042451
42 Dominican Republic 34197 765 18141 15291 0.022370
43 Afghanistan 32022 807 16041 15174 0.025201
44 Switzerland 31967 1965 29200 802 0.061470
45 Bahrain 27837 94 22583 5160 0.003377
46 Romania 27746 1687 19363 6696 0.060802
47 Nigeria 27110 616 10801 15693 0.022722
48 Israel 27047 324 17547 9176 0.011979
49 Armenia 26658 459 15036 11163 0.017218
50 Ireland 25489 1738 23364 387 0.068186
51 Honduras 21120 591 2190 18339 0.027983
52 Guatemala 20072 843 3279 15950 0.041999
53 Japan 19055 977 16615 1463 0.051273
54 Azerbaijan 18684 228 10425 8031 0.012203
55 Ghana 18134 117 13550 4467 0.006452
56 Austria 17941 705 16514 722 0.039295
57 Moldova 17150 560 9846 6744 0.032653
58 Serbia 15195 287 12912 1996 0.018888
59 Algeria 14657 928 10342 3387 0.063314
60 Nepal 14519 31 5320 9168 0.002135
61 Denmark 13015 606 11969 440 0.046562
62 Morocco 12969 229 9090 3650 0.017657
63 Korea, South 12967 282 11759 926 0.021748
64 Cameroon 12592 313 10100 2179 0.024857
65 Czechia 12178 353 7822 4003 0.028987
66 Cote d'Ivoire 9992 68 4660 5264 0.006805
67 Sudan 9573 602 4606 4365 0.062885
68 Uzbekistan 9078 27 6034 3017 0.002974
69 Norway 8902 251 8138 513 0.028196
70 Malaysia 8643 121 8437 85 0.014000
71 Australia 8066 104 7130 832 0.012894
72 Finland 7241 328 6700 213 0.045298
73 Congo (Kinshasa) 7189 176 2317 4696 0.024482
74 Senegal 7054 121 4599 2334 0.017153
75 El Salvador 7000 191 4115 2694 0.027286
76 Kenya 6941 152 2109 4680 0.021899
77 Kyrgyzstan 6767 76 2655 4036 0.011231
78 North Macedonia 6625 321 2748 3556 0.048453
79 Venezuela 6273 57 2100 4116 0.009087
80 Haiti 6101 110 1141 4850 0.018030
81 Tajikistan 6058 52 4690 1316 0.008584
82 Ethiopia 5846 103 2430 3313 0.017619
83 Gabon 5513 42 2508 2963 0.007618
84 Guinea 5450 33 4392 1025 0.006055
85 Bulgaria 5315 232 2802 2281 0.043650
86 Bosnia and Herzegovina 4788 189 2515 2084 0.039474
87 Djibouti 4715 55 4564 96 0.011665
88 Mauritania 4606 129 1727 2750 0.028007
89 Luxembourg 4395 110 4012 273 0.025028
90 Hungary 4166 587 2721 858 0.140903
91 Costa Rica 4023 18 1589 2416 0.004474
92 Central African Republic 3788 47 810 2931 0.012408
93 Greece 3458 192 1374 1892 0.055523
94 Thailand 3179 58 3059 62 0.018245
95 West Bank and Gaza 3080 9 460 2611 0.002922
96 Equatorial Guinea 3071 51 842 2178 0.016607
97 Kosovo 3064 55 1707 1302 0.017950
98 Somalia 2944 90 951 1903 0.030571
99 Croatia 2912 110 2155 647 0.037775
100 Albania 2662 69 1559 1034 0.025920
101 Nicaragua 2519 83 1238 1198 0.032950
102 Madagascar 2403 24 1040 1339 0.009988
103 Maldives 2400 10 1969 421 0.004167
104 Cuba 2353 86 2221 46 0.036549
105 Paraguay 2303 19 1108 1176 0.008250
106 Mali 2260 117 1502 641 0.051770
107 Sri Lanka 2066 11 1827 228 0.005324
108 South Sudan 2021 38 333 1650 0.018803
109 Estonia 1990 69 1842 79 0.034673
110 Iceland 1850 10 1828 12 0.005405
111 Lithuania 1825 78 1536 211 0.042740
112 Lebanon 1796 35 1242 519 0.019488
113 Slovakia 1700 28 1466 206 0.016471
114 Guinea-Bissau 1654 24 317 1313 0.014510
115 Slovenia 1634 111 1384 139 0.067931
116 Zambia 1632 30 1348 254 0.018382
117 New Zealand 1530 22 1490 18 0.014379
118 Sierra Leone 1518 60 1007 451 0.039526
119 Congo (Brazzaville) 1382 41 486 855 0.029667
120 Malawi 1342 16 271 1055 0.011923
121 Cabo Verde 1301 15 629 657 0.011530
122 Yemen 1221 325 513 383 0.266175
123 Benin 1199 21 333 845 0.017515
124 Tunisia 1178 50 1039 89 0.042445
125 Jordan 1136 9 889 238 0.007923
126 Latvia 1122 30 988 104 0.026738
127 Niger 1081 68 959 54 0.062905
128 Rwanda 1063 3 493 567 0.002822
129 Cyprus 999 19 833 147 0.019019
130 Burkina Faso 967 53 846 68 0.054809
131 Uruguay 947 28 828 91 0.029567
132 Georgia 939 15 817 107 0.015974
133 Mozambique 918 6 249 663 0.006536
134 Uganda 902 0 847 55 0.000000
135 Libya 891 26 224 641 0.029181
136 Eswatini 873 11 452 410 0.012600
137 Chad 868 74 785 9 0.085253
138 Andorra 855 52 800 3 0.060819
139 Liberia 819 37 338 444 0.045177
140 Sao Tome and Principe 717 13 260 444 0.018131
141 Jamaica 715 10 560 145 0.013986
142 Diamond Princess 712 13 651 48 0.018258
143 San Marino 698 42 656 0 0.060172
144 Malta 671 9 649 13 0.013413
145 Togo 667 14 424 229 0.020990
146 Zimbabwe 617 7 173 437 0.011345
147 Montenegro 616 12 315 289 0.019481
148 Suriname 547 13 252 282 0.023766
149 Tanzania 509 21 183 305 0.041257
150 Taiwan* 448 7 438 3 0.015625
151 Vietnam 355 0 340 15 0.000000
152 Mauritius 341 10 330 1 0.029326
153 Angola 315 17 97 201 0.053968
154 Syria 312 9 113 190 0.028846
155 Burma 304 6 223 75 0.019737
156 Comoros 303 7 200 96 0.023102
157 Namibia 293 0 24 269 0.000000
158 Guyana 250 14 117 119 0.056000
159 Botswana 227 1 28 198 0.004405
160 Mongolia 220 0 179 41 0.000000
161 Eritrea 215 0 56 159 0.000000
162 Burundi 170 1 115 54 0.005882
163 Brunei 141 3 138 0 0.021277
164 Cambodia 141 0 131 10 0.000000
165 Trinidad and Tobago 130 8 115 7 0.061538
166 Monaco 106 4 95 7 0.037736
167 Bahamas 104 11 89 4 0.105769
168 Barbados 97 7 90 0 0.072165
169 Liechtenstein 83 1 81 1 0.012048
170 Seychelles 81 0 11 70 0.000000
171 Bhutan 77 0 50 27 0.000000
172 Antigua and Barbuda 69 3 23 43 0.043478
173 Gambia 55 2 27 26 0.036364
174 Lesotho 35 0 11 24 0.000000
175 Saint Vincent and the Grenadines 29 0 29 0 0.000000
176 Belize 28 2 18 8 0.071429
177 Timor-Leste 24 0 24 0 0.000000
178 Grenada 23 0 23 0 0.000000
179 Laos 19 0 19 0 0.000000
180 Saint Lucia 19 0 19 0 0.000000
181 Dominica 18 0 18 0 0.000000
182 Fiji 18 0 18 0 0.000000
183 Saint Kitts and Nevis 15 0 15 0 0.000000
184 Holy See 12 0 12 0 0.000000
185 Papua New Guinea 11 0 8 3 0.000000
186 Western Sahara 10 1 8 1 0.100000
187 MS Zaandam 9 2 0 7 0.222222
In [56]:
unique_provinces =  list(latest_data['Province_State'].unique())

Getting the latest information about provinces/states that have confirmed coronavirus cases

In [57]:
province_confirmed_cases = []
province_country = [] 
province_death_cases = [] 
province_recovery_cases = []
province_mortality_rate = [] 

no_cases = [] 
for i in unique_provinces:
    cases = latest_data[latest_data['Province_State']==i]['Confirmed'].sum()
    if cases > 0:
        province_confirmed_cases.append(cases)
    else:
        no_cases.append(i)
 
# remove areas with no confirmed cases
for i in no_cases:
    unique_provinces.remove(i)
    
unique_provinces = [k for k, v in sorted(zip(unique_provinces, province_confirmed_cases), key=operator.itemgetter(1), reverse=True)]
for i in range(len(unique_provinces)):
    province_confirmed_cases[i] = latest_data[latest_data['Province_State']==unique_provinces[i]]['Confirmed'].sum()
    province_country.append(latest_data[latest_data['Province_State']==unique_provinces[i]]['Country_Region'].unique()[0])
    province_death_cases.append(latest_data[latest_data['Province_State']==unique_provinces[i]]['Deaths'].sum())
    province_recovery_cases.append(latest_data[latest_data['Province_State']==unique_provinces[i]]['Recovered'].sum())
    province_mortality_rate.append(province_death_cases[i]/province_confirmed_cases[i])
In [58]:
# number of cases per province/state/city
province_df = pd.DataFrame({'Province/State Name': unique_provinces, 'Country': province_country, 'Number of Confirmed Cases': province_confirmed_cases,
                          'Number of Deaths': province_death_cases, 'Number of Recoveries' : province_recovery_cases,
                          'Mortality Rate': province_mortality_rate})
# number of cases per country/region

province_df.style.background_gradient(cmap='Greens')
Out[58]:
Province/State Name Country Number of Confirmed Cases Number of Deaths Number of Recoveries Mortality Rate
0 New York US 394954 32064 0 0.081184
1 Sao Paulo Brazil 302179 15351 162851 0.050801
2 California US 246550 6265 0 0.025411
3 England United Kingdom 243910 39434 0 0.161674
4 Moscow Russia 222871 3870 153248 0.017364
5 Metropolitana Chile 221820 5055 199125 0.022789
6 Maharashtra India 186626 8178 101172 0.043820
7 Texas US 179137 2542 0 0.014190
8 New Jersey US 172356 15107 0 0.087650
9 Florida US 169106 3617 0 0.021389
10 Lima Peru 160901 4652 0 0.028912
11 Illinois US 144013 6951 0 0.048266
12 Rio de Janeiro Brazil 116823 10332 95028 0.088441
13 Ceara Brazil 115524 6284 90085 0.054396
14 Massachusetts US 109338 8132 0 0.074375
15 Para Brazil 108067 5004 95331 0.046305
16 Tamil Nadu India 98392 1321 56021 0.013426
17 Lombardia Italy 94108 16671 67610 0.177148
18 Pennsylvania US 92612 6712 0 0.072474
19 Delhi India 92175 2864 63007 0.031071
20 Sindh Pakistan 89225 1437 49926 0.016105
21 Georgia US 87709 2849 0 0.032482
22 Arizona US 87445 1764 0 0.020173
23 Maranhao Brazil 86025 2119 63156 0.024632
24 Punjab India 84740 1971 37930 0.023259
25 Bahia Brazil 79349 1947 53334 0.024537
26 Amazonas Brazil 78178 3037 61834 0.038847
27 Madrid Spain 72096 8428 40736 0.116900
28 Michigan US 71678 6212 0 0.086665
29 Maryland US 68423 3212 0 0.046943
30 North Carolina US 68216 1409 0 0.020655
31 Hubei China 68135 4512 63623 0.066221
32 Virginia US 63735 1816 0 0.028493
33 Catalonia Spain 61888 5673 26203 0.091666
34 Louisiana US 61561 3255 0 0.052874
35 Pernambuco Brazil 61119 4968 41925 0.081284
36 Moscow Oblast Russia 58193 921 36802 0.015827
37 Quebec Canada 55593 5541 25034 0.099671
38 Ohio US 52865 2876 0 0.054403
39 Distrito Federal Brazil 52281 631 38901 0.012069
40 Minas Gerais Brazil 50707 1059 30469 0.020885
41 Espirito Santo Brazil 50240 1728 30332 0.034395
42 Ciudad de Mexico Mexico 49573 6750 39104 0.136163
43 Paraiba Brazil 49536 1044 16349 0.021076
44 Bayern Germany 48547 2597 45210 0.053495
45 Tennessee US 46890 620 0 0.013222
46 Connecticut US 46646 4326 0 0.092741
47 Indiana US 46387 2662 0 0.057387
48 Nordrhein-Westfalen Germany 43436 1684 37853 0.038770
49 Alabama US 40111 985 0 0.024557
50 South Carolina US 39701 784 0 0.019748
51 Alagoas Brazil 37328 1091 29185 0.029227
52 Ontario Canada 37242 2737 32255 0.073492
53 Minnesota US 37210 1495 0 0.040177
54 Baden-Wurttemberg Germany 35780 1839 33518 0.051397
55 Mexico Mexico 35631 4514 28901 0.126687
56 Washington US 34151 1342 0 0.039296
57 Gujarat India 33913 1886 24593 0.055613
58 Colorado US 33335 1701 0 0.051027
59 Rio Grande do Norte Brazil 32897 1103 2904 0.033529
60 Capital District Colombia 31417 735 13065 0.023395
61 Piemonte Italy 31378 4096 25983 0.130537
62 Iowa US 30259 719 0 0.023762
63 Wisconsin US 29738 793 0 0.026666
64 Rio Grande do Sul Brazil 29195 663 23511 0.022709
65 Amapa Brazil 29153 427 16298 0.014647
66 Mississippi US 28770 1092 0 0.037956
67 Santa Catarina Brazil 28575 362 22864 0.012668
68 Emilia-Romagna Italy 28535 4265 23267 0.149466
69 Khyber Pakhtunkhwa Pakistan 27170 983 14715 0.036180
70 Sergipe Brazil 26612 725 14878 0.027243
71 Parana Brazil 26304 706 6613 0.026840
72 Goias Brazil 26263 545 0 0.020752
73 Uttar Pradesh India 24825 735 17221 0.029607
74 Saint Petersburg Russia 24727 1227 19004 0.049622
75 Atlantico Colombia 24212 1277 8149 0.052742
76 Piaui Brazil 23307 697 21717 0.029905
77 Utah US 23270 176 0 0.007563
78 Missouri US 22636 1044 0 0.046121
79 Arkansas US 22075 279 0 0.012639
80 Rondonia Brazil 21970 530 11302 0.024124
81 Stockholm Sweden 20759 2337 0 0.112578
82 West Bengal India 19819 699 13037 0.035269
83 Castilla y Leon Spain 19750 2787 8716 0.141114
84 Nevada US 19733 525 0 0.026605
85 Nebraska US 19452 282 0 0.014497
86 Veneto Italy 19309 2022 16890 0.104718
87 Rajasthan India 18662 430 14948 0.023041
88 Telangana India 18570 275 9069 0.014809
89 Scotland United Kingdom 18264 2486 0 0.136115
90 Callao Peru 18230 643 0 0.035272
91 Castilla - La Mancha Spain 18187 3026 6392 0.166383
92 Karnataka India 18016 272 8334 0.015098
93 Nizhny Novgorod Oblast Russia 17943 249 11589 0.013877
94 Mato Grosso Brazil 17727 674 6543 0.038021
95 Roraima Brazil 17583 354 4420 0.020133
96 Piura Peru 17336 769 0 0.044359
97 Rhode Island US 16941 959 0 0.056608
98 Andhra Pradesh India 16097 198 7313 0.012300
99 Kentucky US 16079 581 0 0.036134
100 Wales United Kingdom 15815 1524 0 0.096364
101 Vastra Gotaland Sweden 15628 762 0 0.048759
102 Haryana India 15509 251 11019 0.016184
103 Kansas US 15002 280 0 0.018664
104 Oklahoma US 14531 395 0 0.027183
105 Madhya Pradesh India 14106 589 10815 0.041755
106 Acre Brazil 14048 378 7650 0.026908
107 Lambayeque Peru 13904 756 0 0.054373
108 Pais Vasco Spain 13798 1560 16160 0.113060
109 Niedersachsen Germany 13605 634 12265 0.046601
110 Sverdlovsk Oblast Russia 13272 107 7737 0.008062
111 Islamabad Pakistan 13195 129 8264 0.009776
112 Andalusia Spain 13123 1428 10671 0.108817
113 New Mexico US 12520 503 0 0.040176
114 Valparaiso Chile 12201 252 9903 0.020654
115 Delaware US 11731 510 0 0.043475
116 C. Valenciana Spain 11616 1432 9970 0.123278
117 Tocantins Brazil 11454 209 7226 0.018247
118 Tabasco Mexico 11165 1097 8960 0.098253
119 Puebla Mexico 11161 1406 8336 0.125974
120 Hessen Germany 10869 508 9952 0.046738
121 Veracruz Mexico 10671 1635 7934 0.153219
122 Balochistan Pakistan 10666 122 5073 0.011438
123 Valle del Cauca Colombia 10508 422 4771 0.040160
124 Bihar India 10471 77 8020 0.007354
125 District of Columbia US 10390 554 0 0.053321
126 La Libertad Peru 10333 615 0 0.059518
127 Toscana Italy 10258 1109 8825 0.108111
128 Liguria Italy 9984 1558 8145 0.156050
129 Rostov Oblast Russia 9796 130 6173 0.013271
130 Khanty-Mansi Autonomous Okrug Russia 9485 59 4762 0.006220
131 Baja California Mexico 9423 2007 6775 0.212989
132 Bolivar Colombia 9392 388 4640 0.041312
133 Oregon US 9294 209 0 0.022488
134 Antofagasta Chile 9285 163 7375 0.017555
135 Galicia Spain 9239 619 9204 0.066999
136 Krasnoyarsk Krai Russia 9181 137 4517 0.014922
137 Loreto Peru 9162 347 0 0.037874
138 Mato Grosso do Sul Brazil 9062 91 4937 0.010042
139 Assam India 9013 12 6106 0.001331
140 Sinaloa Mexico 8613 1356 6829 0.157436
141 Berlin Germany 8344 214 7515 0.025647
142 Sonora Mexico 8337 773 6941 0.092719
143 Ancash Peru 8263 487 0 0.058937
144 Alberta Canada 8202 155 7505 0.018898
145 Lazio Italy 8130 839 6460 0.103198
146 Guanajuato Mexico 8062 390 6014 0.048375
147 Dagestan Republic Russia 7968 383 5902 0.048067
148 Ucayali Peru 7937 156 0 0.019655
149 Voronezh Oblast Russia 7908 41 4338 0.005185
150 Ica Peru 7850 497 0 0.063312
151 Jammu and Kashmir India 7849 115 4974 0.014652
152 Irkutsk Oblast Russia 7776 66 2927 0.008488
153 Puerto Rico US 7608 153 0 0.020110
154 Odisha India 7545 27 5502 0.003579
155 Chelyabinsk Oblast Russia 7473 88 3830 0.011776
156 Arequipa Peru 7425 280 0 0.037710
157 Unknown Chile 7176 0 183113 0.000000
158 Rheinland-Pfalz Germany 7036 235 6603 0.033400
159 Jalisco Mexico 7030 762 5332 0.108393
160 Biobio Chile 6951 58 5196 0.008344
161 South Dakota US 6893 97 0 0.014072
162 Tula Oblast Russia 6836 114 5805 0.016676
163 Marche Italy 6789 987 5564 0.145382
164 Nuevo Leon Mexico 6656 361 4791 0.054237
165 Saratov Oblast Russia 6645 44 3569 0.006622
166 Idaho US 6593 92 0 0.013954
167 Tamaulipas Mexico 6553 399 5103 0.060888
168 Tokyo Japan 6529 325 5474 0.049778
169 Volgograd Oblast Russia 6508 41 3057 0.006300
170 OHiggins Chile 6445 77 4447 0.011947
171 Aragon Spain 6380 912 3772 0.142947
172 Bryansk Oblast Russia 6325 34 4541 0.005375
173 Tarapaca Chile 6215 95 5380 0.015286
174 Novosibirsk Oblast Russia 6177 101 3714 0.016351
175 Kaluga Oblast Russia 6117 44 3690 0.007193
176 Krasnodar Krai Russia 6102 79 4827 0.012947
177 Michoacan Mexico 5996 479 5059 0.079887
178 Ulyanovsk Oblast Russia 5993 43 3208 0.007175
179 Yamalo-Nenets Autonomous Okrug Russia 5980 40 2462 0.006689
180 New Hampshire US 5822 375 0 0.064411
181 Maule Chile 5820 53 4504 0.009107
182 Arkhangelsk Oblast Russia 5790 61 2989 0.010535
183 Northern Ireland United Kingdom 5768 551 0 0.095527
184 Lviv Oblast Ukraine 5730 146 856 0.025480
185 Oaxaca Mexico 5671 601 4494 0.105978
186 Guerrero Mexico 5652 913 4268 0.161536
187 Murmansk Oblast Russia 5485 16 3609 0.002917
188 Navarra Spain 5476 528 3905 0.096421
189 Sachsen Germany 5457 224 5165 0.041048
190 Bashkortostan Republic Russia 5450 18 3144 0.003303
191 Stavropol Krai Russia 5318 99 3081 0.018616
192 San Martin Peru 5316 154 0 0.028969
193 Ryazan Oblast Russia 5277 19 3671 0.003601
194 Kiev Ukraine 5242 111 1642 0.021175
195 Coahuila Mexico 5213 240 3718 0.046039
196 Hamburg Germany 5211 260 4900 0.049894
197 Chuvashia Republic Russia 5194 33 2940 0.006353
198 Kursk Oblast Russia 5193 37 3195 0.007125
199 Penza Oblast Russia 5118 43 4869 0.008402
200 Kabardino-Balkarian Republic Russia 5109 54 3782 0.010570
201 Samara Oblast Russia 4973 56 2834 0.011261
202 Chernivtsi Oblast Ukraine 4894 183 1970 0.037393
203 Yaroslavl Oblast Russia 4866 19 3420 0.003905
204 P.A. Trento Italy 4865 405 4414 0.083248
205 Ivanovo Oblast Russia 4829 45 2973 0.009319
206 Tambov Oblast Russia 4816 19 2961 0.003945
207 Kerala India 4753 25 2640 0.005260
208 Tyva Republic Russia 4717 3 1323 0.000636
209 Campania Italy 4702 432 4080 0.091876
210 Antioquia Colombia 4660 30 1921 0.006438
211 Smolensk Oblast Russia 4659 66 2450 0.014166
212 Chiapas Mexico 4649 584 3712 0.125618
213 Belgorod Oblast Russia 4637 42 3663 0.009058
214 Tatarstan Republic Russia 4634 12 3720 0.002590
215 Yucatan Mexico 4601 466 3063 0.101282
216 Orel Oblast Russia 4579 66 2911 0.014414
217 Leningrad Oblast Russia 4530 32 4185 0.007064
218 Puglia Italy 4530 545 3876 0.120309
219 Primorsky Krai Russia 4458 48 2904 0.010767
220 Vladimir Oblast Russia 4456 65 2857 0.014587
221 French Guiana France 4444 16 1680 0.003600
222 Khabarovsk Krai Russia 4386 29 2200 0.006612
223 Sakha (Yakutiya) Republic Russia 4315 32 3084 0.007416
224 Altai Krai Russia 4134 38 2991 0.009192
225 Jonkoping Sweden 4102 169 0 0.041199
226 Hidalgo Mexico 4079 698 2896 0.171120
227 La Rioja Spain 4078 366 3107 0.089750
228 Junin Peru 4058 146 0 0.035978
229 North Ossetia - Alania Republic Russia 4049 67 3550 0.016547
230 Quintana Roo Mexico 3947 605 2539 0.153281
231 Orenburg Oblast Russia 3929 20 2939 0.005090
232 Rivne Oblast Ukraine 3877 70 1875 0.018055
233 Lipetsk Oblast Russia 3852 17 2414 0.004413
234 Perm Krai Russia 3801 78 2800 0.020521
235 Omsk Oblast Russia 3780 30 2028 0.007937
236 Tver Oblast Russia 3712 72 2618 0.019397
237 Karachay-Cherkess Republic Russia 3668 15 1597 0.004089
238 North Dakota US 3657 80 0 0.021876
239 Astrakhan Oblast Russia 3548 32 2363 0.009019
240 Narino Colombia 3521 119 2102 0.033797
241 Komi Republic Russia 3513 33 1782 0.009394
242 Brandenburg Germany 3470 172 3200 0.049568
243 Mordovia Republic Russia 3467 27 2303 0.007788
244 Uppsala Sweden 3330 226 0 0.067868
245 Maine US 3328 105 0 0.031550
246 San Luis Potosi Mexico 3324 161 2449 0.048436
247 Friuli Venezia Giulia Italy 3314 345 2907 0.104104
248 Abruzzo Italy 3292 464 2668 0.140948
249 Ostergotland Sweden 3278 224 0 0.068334
250 Kirov Oblast Russia 3276 38 2711 0.011600
251 Araucania Chile 3267 41 2983 0.012550
252 Thuringen Germany 3260 181 3050 0.055521
253 Tyumen Oblast Russia 3231 18 1942 0.005571
254 Zabaykalsky Krai Russia 3212 42 2152 0.013076
255 Chihuahua Mexico 3211 653 2405 0.203363
256 New South Wales Australia 3211 49 2789 0.015260
257 Schleswig-Holstein Germany 3176 152 3000 0.047859
258 Coquimbo Chile 3170 24 2507 0.007571
259 Skane Sweden 3095 246 0 0.079483
260 Sicilia Italy 3090 282 2674 0.091262
261 Ingushetia Republic Russia 3088 69 2474 0.022345
262 West Virginia US 3053 93 0 0.030462
263 Zakarpattia Oblast Ukraine 3046 97 975 0.031845
264 Extremadura Spain 3040 519 2652 0.170724
265 Morelos Mexico 3032 683 2136 0.225264
266 Mari El Republic Russia 3020 23 2835 0.007616
267 Chhattisgarh India 3013 14 2385 0.004647
268 Buryatia Republic Russia 2991 19 2059 0.006352
269 Uttarakhand India 2984 42 2405 0.014075
270 Novgorod Oblast Russia 2978 26 847 0.008731
271 Cundinamarca Colombia 2975 66 1516 0.022185
272 British Columbia Canada 2940 177 2603 0.060204
273 Tumbes Peru 2883 125 0 0.043358
274 Pskov Oblast Russia 2875 23 1091 0.008000
275 Saarland Germany 2807 174 2584 0.061988
276 Kiev Oblast Ukraine 2655 51 1398 0.019209
277 Mayotte France 2650 35 2341 0.013208
278 P.A. Bolzano Italy 2642 292 2266 0.110522
279 Tlaxcala Mexico 2591 367 1949 0.141644
280 Jharkhand India 2584 15 1983 0.005805
281 Huanuco Peru 2560 69 0 0.026953
282 Gavleborg Sweden 2531 133 0 0.052548
283 Volyn Oblast Ukraine 2508 62 744 0.024721
284 Ivano-Frankivsk Oblast Ukraine 2489 102 1108 0.040980
285 Kaliningrad Oblast Russia 2472 38 1538 0.015372
286 Nuble Chile 2447 33 2136 0.013486
287 Asturias Spain 2435 334 1063 0.137166
288 Canarias Spain 2431 162 1537 0.066639
289 Aguascalientes Mexico 2399 154 1933 0.064193
290 Victoria Australia 2368 20 1904 0.008446
291 Cantabria Spain 2364 216 2287 0.091371
292 Tomsk Oblast Russia 2364 17 1189 0.007191
293 Orebro Sweden 2363 164 0 0.069403
294 Vastmanland Sweden 2298 166 0 0.072237
295 Kamchatka Krai Russia 2293 28 1073 0.012211
296 Cajamarca Peru 2292 74 0 0.032286
297 Baleares Spain 2220 224 1533 0.100901
298 Sormland Sweden 2214 244 0 0.110208
299 Kharkiv Oblast Ukraine 2207 84 1137 0.038061
300 Campeche Mexico 2092 223 1642 0.106597
301 Durango Mexico 2080 149 1444 0.071635
302 Queretaro Mexico 2052 296 1501 0.144250
303 Adygea Republic Russia 1981 14 1363 0.007067
304 Madre de Dios Peru 1958 42 0 0.021450
305 Ternopil Oblast Ukraine 1958 29 1177 0.014811
306 Khakassia Republic Russia 1935 15 1361 0.007752
307 Magdalena Colombia 1903 134 789 0.070415
308 Vinnytsia Oblast Ukraine 1895 30 1250 0.015831
309 Sachsen-Anhalt Germany 1880 59 1749 0.031383
310 Ayacucho Peru 1865 26 0 0.013941
311 Osaka Japan 1851 86 1712 0.046461
312 Cusco Peru 1841 19 0 0.010320
313 Nayarit Mexico 1840 209 1228 0.113587
314 Arica y Parinacota Chile 1832 21 1408 0.011463
315 Amur Oblast Russia 1765 13 842 0.007365
316 Kemerovo Oblast Russia 1744 9 788 0.005161
317 Chechen Republic Russia 1736 23 1198 0.013249
318 Odessa Oblast Ukraine 1736 24 761 0.013825
319 Los Lagos Chile 1734 21 1415 0.012111
320 Kalmykia Republic Russia 1717 23 1075 0.013395
321 Murcia Spain 1693 148 2180 0.087419
322 Choco Colombia 1682 49 586 0.029132
323 Dalarna Sweden 1681 157 0 0.093397
324 Bremen Germany 1678 53 1560 0.031585
325 Vologda Oblast Russia 1646 17 1224 0.010328
326 Guangdong China 1642 8 1629 0.004872
327 Baja California Sur Mexico 1631 79 1266 0.048437
328 Kostroma Oblast Russia 1612 21 955 0.013027
329 Wyoming US 1550 20 0 0.012903
330 Halland Sweden 1538 72 0 0.046814
331 Gilgit-Baltistan Pakistan 1524 28 1173 0.018373
332 Kanagawa Japan 1520 96 1338 0.063158
333 Goa India 1482 4 734 0.002699
334 Magallanes Chile 1465 15 1326 0.010239
335 Udmurt Republic Russia 1459 16 1069 0.010966
336 Umbria Italy 1444 80 1353 0.055402
337 Tripura India 1435 1 1146 0.000697
338 Zhytomyr Oblast Ukraine 1421 30 849 0.021112
339 Vasternorrland Sweden 1403 117 0 0.083393
340 Karelia Republic Russia 1378 3 588 0.002177
341 Sucre Colombia 1373 75 133 0.054625
342 Sardegna Italy 1368 133 1222 0.097222
343 Norrbotten Sweden 1344 67 0 0.049851
344 Manipur India 1279 0 617 0.000000
345 Henan China 1276 22 1254 0.017241
346 Meta Colombia 1271 16 976 0.012589
347 Zhejiang China 1269 1 1267 0.000788
348 Hokkaido Japan 1266 101 1061 0.079779
349 Hong Kong China 1242 7 1120 0.005636
350 Vermont US 1227 56 0 0.045640
351 Valle d'Aosta Italy 1195 146 1046 0.122176
352 Calabria Italy 1181 97 1059 0.082134
353 Saitama Japan 1166 65 960 0.055746
354 Azad Jammu and Kashmir Pakistan 1160 33 686 0.028448
355 Atacama Chile 1135 2 790 0.001762
356 Pasco Peru 1133 23 0 0.020300
357 Dnipropetrovsk Oblast Ukraine 1095 23 935 0.021005
358 Montana US 1083 22 0 0.020314
359 Cesar Colombia 1071 27 475 0.025210
360 Kronoberg Sweden 1068 96 0 0.089888
361 Queensland Australia 1067 6 1054 0.005623
362 Nova Scotia Canada 1064 63 998 0.059211
363 Jamtland Harjedalen Sweden 1039 54 0 0.051973
364 Tolima Colombia 1021 22 340 0.021548
365 Hunan China 1019 4 1015 0.003925
366 Alaska US 1014 14 0 0.013807
367 Himachal Pradesh India 1014 10 628 0.009862
368 Anhui China 991 6 985 0.006054
369 Ladakh India 990 1 730 0.001010
370 Puno Peru 989 25 0 0.025278
371 Moquegua Peru 983 15 0 0.015259
372 Chiba Japan 982 45 873 0.045825
373 Zacatecas Mexico 973 101 649 0.103803
374 Varmland Sweden 961 69 0 0.071800
375 Tacna Peru 960 9 0 0.009375
376 Heilongjiang China 947 13 934 0.013728
377 Hawaii US 946 18 0 0.019027
378 Jiangxi China 932 1 931 0.001073
379 Huancavelica Peru 927 12 0 0.012945
380 Beijing China 925 9 592 0.009730
381 Fukuoka Japan 854 33 801 0.038642
382 Sakhalin Oblast Russia 853 0 339 0.000000
383 Kurgan Oblast Russia 834 1 455 0.001199
384 Magadan Oblast Russia 833 4 421 0.004802
385 Khmelnytskyi Oblast Ukraine 808 18 558 0.022277
386 Mecklenburg-Vorpommern Germany 803 20 769 0.024907
387 Puducherry India 802 12 331 0.014963
388 Cordoba Colombia 801 59 241 0.073658
389 Saskatchewan Canada 795 14 701 0.017610
390 Shandong China 792 7 785 0.008838
391 Santander Colombia 757 15 204 0.019815
392 Altai Republic Russia 745 1 314 0.001342
393 Vasterbotten Sweden 722 31 0 0.042936
394 Shanghai China 714 7 678 0.009804
395 Hyogo Japan 709 45 655 0.063470
396 Los Rios Chile 708 10 614 0.014124
397 Crimea Republic* Ukraine 698 11 517 0.015759
398 Kalmar Sweden 674 57 0 0.084570
399 Jiangsu China 654 0 654 0.000000
400 Kirovohrad Oblast Ukraine 650 31 551 0.047692
401 Cherkasy Oblast Ukraine 645 26 477 0.040310
402 Western Australia Australia 611 9 598 0.014730
403 Colima Mexico 603 78 386 0.129353
404 Sichuan China 595 3 581 0.005042
405 Chongqing China 582 6 574 0.010309
406 Zaporizhia Oblast Ukraine 576 17 442 0.029514
407 Channel Islands United Kingdom 571 47 512 0.082312
408 Apurimac Peru 569 19 0 0.033392
409 Donetsk Oblast Ukraine 562 9 167 0.016014
410 Chernihiv Oblast Ukraine 547 11 145 0.020110
411 Risaralda Colombia 546 13 284 0.023810
412 Reunion France 531 2 472 0.003766
413 Aichi Japan 523 34 478 0.065010
414 Nagaland India 501 0 182 0.000000
415 La Guajira Colombia 479 21 108 0.043841
416 Chandigarh India 450 6 389 0.013333
417 Blekinge Sweden 449 14 0 0.031180
418 Mykolaiv Oblast Ukraine 447 10 288 0.022371
419 Molise Italy 445 23 401 0.051685
420 South Australia Australia 443 4 436 0.009029
421 Jewish Autonomous Okrug Russia 430 5 353 0.011628
422 Cauca Colombia 405 18 161 0.044444
423 Basilicata Italy 402 27 373 0.067164
424 Boyaca Colombia 390 17 228 0.043590
425 Kyoto Japan 386 18 346 0.046632
426 Fujian China 363 1 359 0.002755
427 Hebei China 349 6 334 0.017192
428 Norte de Santander Colombia 342 20 164 0.058480
429 Huila Colombia 340 14 252 0.041176
430 Isle of Man United Kingdom 336 24 312 0.071429
431 Manitoba Canada 325 7 302 0.021538
432 Port Quarantine Japan 323 1 194 0.003096
433 Poltava Oblast Ukraine 320 13 273 0.040625
434 Shaanxi China 320 3 310 0.009375
435 Sumy Oblast Ukraine 302 5 237 0.016556
436 Ishikawa Japan 300 27 257 0.090000
437 Guam US 280 5 0 0.017857
438 Caldas Colombia 264 8 181 0.030303
439 Newfoundland and Labrador Canada 261 3 258 0.011494
440 Guangxi China 254 2 252 0.007874
441 Martinique France 242 14 98 0.057851
442 Inner Mongolia China 238 1 237 0.004202
443 Sevastopol* Ukraine 236 4 185 0.016949
444 Dadra and Nagar Haveli and Daman and Diu India 230 0 89 0.000000
445 Tasmania Australia 228 13 215 0.057018
446 Toyama Japan 228 22 205 0.096491
447 Cayman Islands United Kingdom 201 1 194 0.004975
448 Shanxi China 198 0 198 0.000000
449 Tianjin China 198 3 194 0.015152
450 Arunachal Pradesh India 195 1 66 0.005128
451 Kherson Oblast Ukraine 194 3 179 0.015464
452 Faroe Islands Denmark 187 0 187 0.000000
453 Yunnan China 186 2 183 0.010753
454 Guadeloupe France 182 14 157 0.076923
455 Gibraltar United Kingdom 178 0 176 0.000000
456 Ibaraki Japan 176 10 159 0.056818
457 Hainan China 171 6 165 0.035088
458 Quindio Colombia 167 5 117 0.029940
459 Hiroshima Japan 166 3 163 0.018072
460 New Brunswick Canada 165 2 160 0.012121
461 Gansu China 164 2 153 0.012195
462 Ceuta Spain 163 4 163 0.024540
463 Gotland Sweden 162 6 0 0.037037
464 Mizoram India 162 0 126 0.000000
465 Gifu Japan 160 7 147 0.043750
466 Liaoning China 156 2 148 0.012821
467 Jilin China 155 2 153 0.012903
468 Gunma Japan 153 19 131 0.124183
469 Guizhou China 147 2 145 0.013605
470 Bermuda United Kingdom 146 9 135 0.061644
471 Okinawa Japan 145 7 138 0.048276
472 Chukotka Autonomous Okrug Russia 138 1 130 0.007246
473 Melilla Spain 126 2 125 0.015873
474 Fukui Japan 122 8 114 0.065574
475 Grand Princess Canada 116 3 13 0.025862
476 Andaman and Nicobar Islands India 109 0 51 0.000000
477 Australian Capital Territory Australia 108 3 105 0.027778
478 Nenets Autonomous Okrug Russia 107 0 38 0.000000
479 Aruba Netherlands 104 3 98 0.028846
480 Shiga Japan 103 1 99 0.009709
481 Sikkim India 102 0 53 0.000000
482 Miyagi Japan 95 1 88 0.010526
483 Nara Japan 92 2 90 0.021739
484 Virgin Islands US 92 6 0 0.065217
485 Tochigi Japan 84 0 66 0.000000
486 Luhansk Oblast Ukraine 83 0 59 0.000000
487 Niigata Japan 83 0 82 0.000000
488 Shizuoka Japan 83 1 79 0.012048
489 Ehime Japan 82 5 77 0.060976
490 Fukushima Japan 82 0 81 0.000000
491 Arauca Colombia 80 0 4 0.000000
492 Sint Maarten Netherlands 77 15 62 0.194805
493 Nagano Japan 76 0 76 0.000000
494 Xinjiang China 76 3 73 0.039474
495 Ningxia China 75 0 75 0.000000
496 Yamanashi Japan 75 1 72 0.013333
497 Kochi Japan 74 3 71 0.040541
498 Yamagata Japan 69 0 68 0.000000
499 Casanare Colombia 67 0 47 0.000000
500 Wakayama Japan 64 3 60 0.046875
501 French Polynesia France 62 0 60 0.000000
502 Oita Japan 60 1 59 0.016667
503 Meghalaya India 56 1 42 0.017857
504 Diamond Princess Canada 49 1 0 0.020408
505 Kumamoto Japan 48 3 45 0.062500
506 Aysen Chile 46 0 29 0.000000
507 Macau China 46 0 45 0.000000
508 Mie Japan 45 1 44 0.022222
509 Saga Japan 45 0 45 0.000000
510 Turks and Caicos Islands United Kingdom 44 2 11 0.045455
511 St Martin France 43 3 37 0.069767
512 Caqueta Colombia 39 2 22 0.051282
513 Guaviare Colombia 39 0 7 0.000000
514 Yamaguchi Japan 37 0 37 0.000000
515 Northern Mariana Islands US 31 2 0 0.064516
516 Aomori Japan 30 1 26 0.033333
517 Northern Territory Australia 30 0 29 0.000000
518 Kagawa Japan 28 0 28 0.000000
519 Okayama Japan 28 0 25 0.000000
520 Putumayo Colombia 28 6 9 0.214286
521 Vaupes Colombia 28 0 17 0.000000
522 Prince Edward Island Canada 27 0 27 0.000000
523 Shimane Japan 24 0 24 0.000000
524 Curacao Netherlands 23 1 19 0.043478
525 San Andres y Providencia Colombia 23 1 18 0.043478
526 Kagoshima Japan 21 0 11 0.000000
527 New Caledonia France 21 0 21 0.000000
528 Qinghai China 18 0 18 0.000000
529 Miyazaki Japan 17 0 17 0.000000
530 Nagasaki Japan 17 1 16 0.058824
531 Akita Japan 16 0 16 0.000000
532 Guainia Colombia 14 1 2 0.071429
533 Falkland Islands (Malvinas) United Kingdom 13 0 13 0.000000
534 Greenland Denmark 13 0 13 0.000000
535 Montserrat United Kingdom 11 1 10 0.090909
536 Yukon Canada 11 0 11 0.000000
537 British Virgin Islands United Kingdom 8 1 7 0.125000
538 Bonaire, Sint Eustatius and Saba Netherlands 7 0 7 0.000000
539 Saint Barthelemy France 6 0 6 0.000000
540 Tokushima Japan 6 1 4 0.166667
541 Northwest Territories Canada 5 0 5 0.000000
542 Tottori Japan 4 0 3 0.000000
543 Anguilla United Kingdom 3 0 3 0.000000
544 Saint Pierre and Miquelon France 1 0 1 0.000000
545 Tibet China 1 0 1 0.000000
546 Vichada Colombia 1 0 1 0.000000
In [59]:
nan_indices = [] 

# handle nan if there is any, it is usually a float: float('nan')

for i in range(len(unique_provinces)):
    if type(unique_provinces[i]) == float:
        nan_indices.append(i)

unique_provinces = list(unique_provinces)
province_confirmed_cases = list(province_confirmed_cases)

for i in nan_indices:
    unique_provinces.pop(i)
    province_confirmed_cases.pop(i)

Bar Chart Visualizations for COVID-19

In [60]:
us_confirmed = latest_data[latest_data['Country_Region']=='US']['Confirmed'].sum()
outside_us_confirmed = np.sum(country_confirmed_cases) - us_confirmed
plt.figure(figsize=(16, 9))
plt.barh('United States', us_confirmed)
plt.barh('Outside United States', outside_us_confirmed)
plt.title('# of Coronavirus Confirmed Cases', size=20)
plt.xticks(size=20)
plt.yticks(size=20)
plt.show()
In [61]:
print('Outside United States {} cases:'.format(outside_us_confirmed))
print('United States {} cases'.format(us_confirmed))
print('Total: {} cases'.format(us_confirmed+outside_us_confirmed))
Outside United States 8129860 cases:
United States 2739879 cases
Total: 10869739 cases
In [62]:
# Only show 15 countries with the most confirmed cases, the rest are grouped into the other category
visual_unique_countries = [] 
visual_confirmed_cases = []
others = np.sum(country_confirmed_cases[15:])

for i in range(len(country_confirmed_cases[:15])):
    visual_unique_countries.append(unique_countries[i])
    visual_confirmed_cases.append(country_confirmed_cases[i])
    
visual_unique_countries.append('Others')
visual_confirmed_cases.append(others)

Visual Representations (bar charts and pie charts)

In [63]:
def plot_bar_graphs(x, y, title):
    plt.figure(figsize=(16, 9))
    plt.barh(x, y)
    plt.title(title, size=20)
    plt.xticks(size=20)
    plt.yticks(size=20)
    plt.show()
In [64]:
plot_bar_graphs(visual_unique_countries, visual_confirmed_cases, '# of Covid-19 Confirmed Cases in Countries/Regions')
In [65]:
log_country_confirmed_cases = [math.log10(i) for i in visual_confirmed_cases]
plot_bar_graphs(visual_unique_countries, log_country_confirmed_cases, 'Common Log # of Coronavirus Confirmed Cases in Countries/Regions')
In [66]:
# Only show 15 provinces with the most confirmed cases, the rest are grouped into the other category
visual_unique_provinces = [] 
visual_confirmed_cases2 = []
others = np.sum(province_confirmed_cases[15:])
for i in range(len(province_confirmed_cases[:15])):
    visual_unique_provinces.append(unique_provinces[i])
    visual_confirmed_cases2.append(province_confirmed_cases[i])

visual_unique_provinces.append('Others')
visual_confirmed_cases2.append(others)
In [67]:
plot_bar_graphs(visual_unique_provinces, visual_confirmed_cases2, '# of Coronavirus Confirmed Cases in Provinces/States')
In [68]:
log_province_confirmed_cases = [math.log10(i) for i in visual_confirmed_cases2]
plot_bar_graphs(visual_unique_provinces, log_province_confirmed_cases, 'Log of # of Coronavirus Confirmed Cases in Provinces/States')

Pie Chart Visualizations for COVID-19

In [69]:
def plot_pie_charts(x, y, title):
    c = random.choices(list(mcolors.CSS4_COLORS.values()),k = len(unique_countries))
    plt.figure(figsize=(20,15))
    plt.title(title, size=20)
    plt.pie(y, colors=c)
    plt.legend(x, loc='best', fontsize=15)
    plt.show()
In [70]:
plot_pie_charts(visual_unique_countries, visual_confirmed_cases, 'Covid-19 Confirmed Cases per Country')
In [71]:
plot_pie_charts(visual_unique_provinces, visual_confirmed_cases2, 'Covid-19 Confirmed Cases per State/Province/Region')
In [72]:
# Plotting countries with regional data using a pie chart 

def plot_pie_country_with_regions(country_name, title):
    regions = list(latest_data[latest_data['Country_Region']==country_name]['Province_State'].unique())
    confirmed_cases = []
    no_cases = [] 
    for i in regions:
        cases = latest_data[latest_data['Province_State']==i]['Confirmed'].sum()
        if cases > 0:
            confirmed_cases.append(cases)
        else:
            no_cases.append(i)

    # remove areas with no confirmed cases
    for i in no_cases:
        regions.remove(i)

    # only show the top 5 states
    regions = [k for k, v in sorted(zip(regions, confirmed_cases), key=operator.itemgetter(1), reverse=True)]

    for i in range(len(regions)):
        confirmed_cases[i] = latest_data[latest_data['Province_State']==regions[i]]['Confirmed'].sum()  
    
    # additional province/state will be considered "others"
    
    if(len(regions)>5):
        regions_5 = regions[:5]
        regions_5.append('Others')
        confirmed_cases_5 = confirmed_cases[:5]
        confirmed_cases_5.append(np.sum(confirmed_cases[5:]))
        plot_pie_charts(regions_5,confirmed_cases_5, title)
    else:
        plot_pie_charts(regions,confirmed_cases, title)
In [73]:
plot_pie_country_with_regions('US', 'COVID-19 Confirmed Cases in the United States')
In [74]:
plot_pie_country_with_regions('China', 'COVID-19 Confirmed Cases in China')
In [75]:
plot_pie_country_with_regions('Canada', 'COVID-19 Confirmed Cases in Canada')
In [76]:
plot_pie_country_with_regions('Australia', 'COVID-19 Confirmed Cases in Australia')
In [77]:
plot_pie_country_with_regions('Italy', 'COVID-19 Confirmed Cases in Italy')
In [78]:
plot_pie_country_with_regions('Germany', 'COVID-19 Confirmed Cases in Germany')
In [79]:
plot_pie_country_with_regions('France', 'COVID-19 Confirmed Cases in France')
In [80]:
plot_pie_country_with_regions('Brazil', 'COVID-19 Confirmed Cases in Brazil')
In [81]:
plot_pie_country_with_regions('Peru', 'COVID-19 Confirmed Cases in Peru')
In [82]:
plot_pie_country_with_regions('Mexico', 'COVID-19 Confirmed Cases in Mexico')

US Medical Data on Testing and Hosptialization

In [83]:
# Replace nan with 0
us_medical_data.fillna(value=0, inplace=True)

def plot_us_medical_data():
    states = us_medical_data['Province_State'].unique()
    testing_number = []
    hospitalization_number = [] 
    testing_rate = []
    hospitalization_rate = []
    
    for i in states:
        testing_number.append(us_medical_data[us_medical_data['Province_State']==i]['People_Tested'].sum())
        hospitalization_number.append(us_medical_data[us_medical_data['Province_State']==i]['People_Hospitalized'].sum())
        testing_rate.append(us_medical_data[us_medical_data['Province_State']==i]['Testing_Rate'].max())
        hospitalization_rate.append(us_medical_data[us_medical_data['Province_State']==i]['Hospitalization_Rate'].max())
    
    # only show the top 15 states
    testing_states = [k for k, v in sorted(zip(states, testing_number), key=operator.itemgetter(1), reverse=True)]
    hospitalization_states = [k for k, v in sorted(zip(states, hospitalization_number), key=operator.itemgetter(1), reverse=True)]
    testing_rate_states = [k for k, v in sorted(zip(states, testing_rate), key=operator.itemgetter(1), reverse=True)]
    hospitalization_rate_states = [k for k, v in sorted(zip(states, hospitalization_rate), key=operator.itemgetter(1), reverse=True)]
    
    for i in range(len(states)):
        testing_number[i] = us_medical_data[us_medical_data['Province_State']==testing_states[i]]['People_Tested'].sum()
        hospitalization_number[i] = us_medical_data[us_medical_data['Province_State']==hospitalization_states[i]]['People_Hospitalized'].sum()
        testing_rate[i] = us_medical_data[us_medical_data['Province_State']==testing_rate_states[i]]['Testing_Rate'].sum()
        hospitalization_rate[i] = us_medical_data[us_medical_data['Province_State']==hospitalization_rate_states[i]]['Hospitalization_Rate'].sum()
    
    plot_bar_graphs(testing_states[:25], testing_number[:25], 'Total Testing per State')
    plot_bar_graphs(hospitalization_states[:25], hospitalization_number[:25], 'Total Hospitalization per State')
    plot_bar_graphs(testing_rate_states[:25], testing_rate[:25], 'Testing Rate per 100,000 People (Johns Hopkins)')
    plot_bar_graphs(hospitalization_rate_states[:25], hospitalization_rate[:25], 'Hospitalization Rate per State (Johns Hopkins)')
    
plot_us_medical_data()